{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1: Computational Inductive Analysis\n",
    "\n",
    "Analysis 1: Difference of Proportions\n",
    "\n",
    "The last two cells produce the output used in Table 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>city</th>\n",
       "      <th>publication</th>\n",
       "      <th>date</th>\n",
       "      <th>word_count</th>\n",
       "      <th>org</th>\n",
       "      <th>identifier</th>\n",
       "      <th>wave</th>\n",
       "      <th>text_string</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>notessecondyear_70.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>553</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1 1 1 1 10 11 2 2 2 2 3 3 3 4 5 6 7 8 9 A An...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>chicago.cwlu_womankind.1971.11.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>890</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>411 93 Actually Alice American American Any As...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>nyc.masses_1916.04.21.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1916</td>\n",
       "      <td>425</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>All Anarchist Anarchist And Birth Birth Birth ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>nyc.redstockings.1973.mainardi.marriagequestio...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>972</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1968 1968 50s 60s Although Although American A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>chicago.cwlu_womankind.1972.01.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>39</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>1972 5 Ghots I January Womankind a bind by cro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>notesfirstyear_30.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notesfirstyear</td>\n",
       "      <td>1968</td>\n",
       "      <td>442</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>12 12 15 1868 1868 1968 28 A AUNT All Anybody ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>chicago.cwlu_womankind.1972.05.14.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>976</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1970 2 2 3 4 4 5 6 7 8 A AT Also Also Amer A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.programforcons...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>785</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>1 2 3 A A A APPENDIX And CONSCIOUSNESSRAISING ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>chicago.cwlu_womankind.1972.11.11.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>985</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1867 1972 A AND ARTICLES Adopt Affiar All Amaz...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.20.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>369</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>Above CWLU CWLU CWLU Chicago Chicago Discus Li...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>chicago.cwlu_womankind.1972.01.12.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>938</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>10 12volt 15 6volt 6volt 6volt A A Another As ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>chicago.cwlu_womankind.1972.09.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1591</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "      <td>1930s 1930s 1auqhter 30s 5th 6th 7871786 A Act...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>chicago.cwlu_womankind.1973.04.16.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>689</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>13</td>\n",
       "      <td>2</td>\n",
       "      <td>25 29 2ND 4 656 A Any Any April As Barry But C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>chicago.cwlu_womankind.1972.12.08.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1130</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>10 1020 1161 12 1300s 1500 1793 1930s 1950s 19...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>chicago.cwlu_womankind.1973.11.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>899</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>15</td>\n",
       "      <td>2</td>\n",
       "      <td>14 30 ABORTION AFLCIO AFLCIO AND Abortions Act...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>chicago.cwlu_womankind.1973.06.08.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1106</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>16</td>\n",
       "      <td>2</td>\n",
       "      <td>112940 1970 1971 1972 1972 1974 26000 3 341840...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>chicago.cwlu_womankind.1972.06.13.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1573</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>17</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1100015000 13 13 1318 18 1800036000 1972 1eV...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>chicago.cwlu_womankind.1972.12.05.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1439</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "      <td>1972 2 2 3 4 438211 552 575 79 852 A A A A A A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>nyc.redstockings.1973.hanisch.mensliberation-3...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1094</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "      <td>1000 150 375 6 7 74 And Apparently As As Avenu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>nyc.redstockings.1973.hanisch.workingconscious...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1011</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>2 3 3 4 4 Although Being But Do Do FROM Gradua...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>notessecondyear_25.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>974</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>21</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1 1570 2 3 369 4 5 5 50 6 6 7 7 8 92169 95 A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.whatwereallywa...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>959</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>22</td>\n",
       "      <td>2</td>\n",
       "      <td>1971 26 All And And And And August Being But C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>chicago.cwlu_womankind.1971.12.03.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>137</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>1970 A A AGGRESS ALL AND ANOTHER AS BE BEFORE ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.61_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>717</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1914 1915 Abbott Addams Addams Addams Aletta A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>chicago.cwlu_womankind.1973.09.04.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>878</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>25</td>\n",
       "      <td>2</td>\n",
       "      <td>100000 10th 13 160 1950s 1972 1972 20hour 21 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>chicago.cwlu_womankind.1973.02.15.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>854</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>26</td>\n",
       "      <td>2</td>\n",
       "      <td>1 18611939 1880s 1902 1902 1904 1906 1915 1915...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>nyc.redstockings.1973.willis.conservatismofms-...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1154</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>27</td>\n",
       "      <td>2</td>\n",
       "      <td>171 1970 A Alpert Alpert Alpert Alpert Alperts...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>nyc.redstockings.1973.serre.psychologique-4.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>318</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1 197 1971 20 3 3 5 65 And As Be Brule But Fem...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.60_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>699</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>29</td>\n",
       "      <td>1</td>\n",
       "      <td>12060 130 1915 30 A Advancement Art Associatio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>chicago.hullhouse_bulletin.1916.01.43_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1916</td>\n",
       "      <td>362</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>30</td>\n",
       "      <td>1</td>\n",
       "      <td>A A Al Augusta Beauty Bowen Bowen Childrens Ch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>notessecondyear_8.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>1023</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>994</td>\n",
       "      <td>2</td>\n",
       "      <td>A A American And And And Applicant But But But...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>chicago.hullhouse_bulletin.1906.09.47_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1906</td>\n",
       "      <td>730</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>995</td>\n",
       "      <td>1</td>\n",
       "      <td>11 111 11th 1907 1907 19th 1st 5 5 7 7th 830 9...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>chicago.hullhouse_bulletin.1901.05.10_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1901</td>\n",
       "      <td>451</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>996</td>\n",
       "      <td>1</td>\n",
       "      <td>A AND ARTS Afternoons And Arts Bohemian Buildi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>nyc.masses_1914.03.07.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1914</td>\n",
       "      <td>212</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>997</td>\n",
       "      <td>1</td>\n",
       "      <td>600000 A A ARE Ahout All And B Being COMPARATI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>chicago.hullhouse_bulletin.1913.01.28_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1913</td>\n",
       "      <td>352</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>998</td>\n",
       "      <td>1</td>\n",
       "      <td>A An An An As At Balaleika Balaleika Balls Bow...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.17.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>819</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>999</td>\n",
       "      <td>2</td>\n",
       "      <td>13 6 A AFTER All And And And And Any BabysitAn...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>chicago.hullhouse_bulletin.1900.08.13_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1900</td>\n",
       "      <td>908</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "      <td>26th 7th 7th 7th AND Addams Association Associ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1000</th>\n",
       "      <td>nyc.redstockings.1973.brooke.sexroletheory-1.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>965</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1001</td>\n",
       "      <td>2</td>\n",
       "      <td>According Although And Anne Are As Betty Blami...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1001</th>\n",
       "      <td>nyc.redstockings.1973.price.keepingwomenout-05...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>261</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1002</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1000 1000 1010 1010 1020 1050 1900 1940 1960...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>chicago.cwlu_womankind.1972.07.12.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>1748</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1003</td>\n",
       "      <td>2</td>\n",
       "      <td>1 10 10 14 15 15 16 1970 2 2 29 3 34 37270 4 4...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1003</th>\n",
       "      <td>notessecondyear_43.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>895</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1004</td>\n",
       "      <td>2</td>\n",
       "      <td>10 A Adult As At Because Gradually He Hindu Hi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1004</th>\n",
       "      <td>nyc.masses_1915.11.08.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1915</td>\n",
       "      <td>1372</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>1005</td>\n",
       "      <td>1</td>\n",
       "      <td>And And And And Because But But Do For I I I I...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1005</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>417</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1006</td>\n",
       "      <td>2</td>\n",
       "      <td>1968 1970 1970 AND Above America Among Atlanti...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1006</th>\n",
       "      <td>chicago.cwlu_womankind.1972.03.03.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>691</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1007</td>\n",
       "      <td>2</td>\n",
       "      <td>3 37 9 A Act And Association August British Bu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1007</th>\n",
       "      <td>nyc.redstockings.1973.leon.dirtytricks-03.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1208</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1008</td>\n",
       "      <td>2</td>\n",
       "      <td>15year 1953 1957 195960 195962 1965 1965 1967 ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1008</th>\n",
       "      <td>chicago.cwlu_womankind.1973.02.11.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1912</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1009</td>\n",
       "      <td>2</td>\n",
       "      <td>170 19 197172 1972 1972 1972 1972 1973 20 200 ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1009</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1218</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1010</td>\n",
       "      <td>2</td>\n",
       "      <td>100 1959 Anthony Anthony Anthony Anthony Antho...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1010</th>\n",
       "      <td>chicago.cwlu_womankind.1972.02.22.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>599</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1011</td>\n",
       "      <td>2</td>\n",
       "      <td>100 12 1250 180 1910 1910 1969 1970 1970 2 2 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1011</th>\n",
       "      <td>chicago.hullhouse_bulletin.1903.01.18_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1903</td>\n",
       "      <td>750</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1012</td>\n",
       "      <td>1</td>\n",
       "      <td>11 12 1903 5 7 8 9 A AJAX APARTMENTS Achilles ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1012</th>\n",
       "      <td>chicago.cwlu_womankind.1973.09.06.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1973</td>\n",
       "      <td>1158</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1013</td>\n",
       "      <td>2</td>\n",
       "      <td>1000year 12 2 212 23 5000 60s Action Again Age...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <td>chicago.cwlu_womankind.1972.07.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>46</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1014</td>\n",
       "      <td>2</td>\n",
       "      <td>1 10 11 11 14 1972 25 8 Chicago JULY Liberatio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1014</th>\n",
       "      <td>nyc.redstockings.1973.price.keepingwomenout-11...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>815</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1015</td>\n",
       "      <td>2</td>\n",
       "      <td>100000 10786 131 1380000year 1500000year 15000...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1015</th>\n",
       "      <td>notessecondyear_18.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>notessecondyear</td>\n",
       "      <td>1969</td>\n",
       "      <td>1059</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1016</td>\n",
       "      <td>2</td>\n",
       "      <td>2 And And But But But But For He His I I In In...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1016</th>\n",
       "      <td>nyc.redstockings.1973.sarachild.powerofhistory...</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1142</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1017</td>\n",
       "      <td>2</td>\n",
       "      <td>2 A A A A All And And And As As As As Because ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1017</th>\n",
       "      <td>nyc.masses_1915.11.09.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>masses</td>\n",
       "      <td>1915</td>\n",
       "      <td>1643</td>\n",
       "      <td>heterodoxy</td>\n",
       "      <td>1018</td>\n",
       "      <td>1</td>\n",
       "      <td>A A A A A A A Against Ah Alice All Allies And ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1018</th>\n",
       "      <td>chicago.hullhouse_bulletin.1910.05.36_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1910</td>\n",
       "      <td>270</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1019</td>\n",
       "      <td>1</td>\n",
       "      <td>A Another Association Association Christmas Ch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1019</th>\n",
       "      <td>chicago.hullhouse_bulletin.1905.08.22_article.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>hullhouse_bulletin</td>\n",
       "      <td>1905</td>\n",
       "      <td>845</td>\n",
       "      <td>hullhouse</td>\n",
       "      <td>1020</td>\n",
       "      <td>1</td>\n",
       "      <td>100 106 1101tionse 1896 1897 1c6 1st 43 A Aid ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1020</th>\n",
       "      <td>chicago.cwlu_womankind.1972.04.01.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1972</td>\n",
       "      <td>20</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1021</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1972 8 April Home Womankind and girls is no ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1021</th>\n",
       "      <td>chicago.cwlu_womankind.1971.12.14.txt</td>\n",
       "      <td>chicago</td>\n",
       "      <td>cwlu_womankind</td>\n",
       "      <td>1971</td>\n",
       "      <td>1195</td>\n",
       "      <td>cwlu</td>\n",
       "      <td>1022</td>\n",
       "      <td>2</td>\n",
       "      <td>95 Adn And And And Answering At At Because Bei...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1022</th>\n",
       "      <td>nyc.redstockings.1973.leon.conditioningline-3.txt</td>\n",
       "      <td>nyc</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1973</td>\n",
       "      <td>1119</td>\n",
       "      <td>redstockings</td>\n",
       "      <td>1023</td>\n",
       "      <td>2</td>\n",
       "      <td>1 1970 1971 2 3 4 5 6 A A A A According Aug Br...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1023 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    doc     city  \\\n",
       "0                                notessecondyear_70.txt      nyc   \n",
       "1                 chicago.cwlu_womankind.1971.11.06.txt  chicago   \n",
       "2                             nyc.masses_1916.04.21.txt      nyc   \n",
       "3     nyc.redstockings.1973.mainardi.marriagequestio...      nyc   \n",
       "4                 chicago.cwlu_womankind.1972.01.01.txt  chicago   \n",
       "5                                 notesfirstyear_30.txt      nyc   \n",
       "6                 chicago.cwlu_womankind.1972.05.14.txt  chicago   \n",
       "7     nyc.redstockings.1973.sarachild.programforcons...      nyc   \n",
       "8                 chicago.cwlu_womankind.1972.11.11.txt  chicago   \n",
       "9                 chicago.cwlu_womankind.1972.03.20.txt  chicago   \n",
       "10                chicago.cwlu_womankind.1972.01.12.txt  chicago   \n",
       "11                chicago.cwlu_womankind.1972.09.06.txt  chicago   \n",
       "12                chicago.cwlu_womankind.1973.04.16.txt  chicago   \n",
       "13                chicago.cwlu_womankind.1972.12.08.txt  chicago   \n",
       "14                chicago.cwlu_womankind.1973.11.06.txt  chicago   \n",
       "15                chicago.cwlu_womankind.1973.06.08.txt  chicago   \n",
       "16                chicago.cwlu_womankind.1972.06.13.txt  chicago   \n",
       "17                chicago.cwlu_womankind.1972.12.05.txt  chicago   \n",
       "18    nyc.redstockings.1973.hanisch.mensliberation-3...      nyc   \n",
       "19    nyc.redstockings.1973.hanisch.workingconscious...      nyc   \n",
       "20                               notessecondyear_25.txt      nyc   \n",
       "21    nyc.redstockings.1973.sarachild.whatwereallywa...      nyc   \n",
       "22                chicago.cwlu_womankind.1971.12.03.txt  chicago   \n",
       "23    chicago.hullhouse_bulletin.1916.01.61_article.txt  chicago   \n",
       "24                chicago.cwlu_womankind.1973.09.04.txt  chicago   \n",
       "25                chicago.cwlu_womankind.1973.02.15.txt  chicago   \n",
       "26    nyc.redstockings.1973.willis.conservatismofms-...      nyc   \n",
       "27      nyc.redstockings.1973.serre.psychologique-4.txt      nyc   \n",
       "28    chicago.hullhouse_bulletin.1916.01.60_article.txt  chicago   \n",
       "29    chicago.hullhouse_bulletin.1916.01.43_article.txt  chicago   \n",
       "...                                                 ...      ...   \n",
       "993                               notessecondyear_8.txt      nyc   \n",
       "994   chicago.hullhouse_bulletin.1906.09.47_article.txt  chicago   \n",
       "995   chicago.hullhouse_bulletin.1901.05.10_article.txt  chicago   \n",
       "996                           nyc.masses_1914.03.07.txt      nyc   \n",
       "997   chicago.hullhouse_bulletin.1913.01.28_article.txt  chicago   \n",
       "998               chicago.cwlu_womankind.1972.03.17.txt  chicago   \n",
       "999   chicago.hullhouse_bulletin.1900.08.13_article.txt  chicago   \n",
       "1000   nyc.redstockings.1973.brooke.sexroletheory-1.txt      nyc   \n",
       "1001  nyc.redstockings.1973.price.keepingwomenout-05...      nyc   \n",
       "1002              chicago.cwlu_womankind.1972.07.12.txt  chicago   \n",
       "1003                             notessecondyear_43.txt      nyc   \n",
       "1004                          nyc.masses_1915.11.08.txt      nyc   \n",
       "1005  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1006              chicago.cwlu_womankind.1972.03.03.txt  chicago   \n",
       "1007      nyc.redstockings.1973.leon.dirtytricks-03.txt      nyc   \n",
       "1008              chicago.cwlu_womankind.1973.02.11.txt  chicago   \n",
       "1009  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1010              chicago.cwlu_womankind.1972.02.22.txt  chicago   \n",
       "1011  chicago.hullhouse_bulletin.1903.01.18_article.txt  chicago   \n",
       "1012              chicago.cwlu_womankind.1973.09.06.txt  chicago   \n",
       "1013              chicago.cwlu_womankind.1972.07.01.txt  chicago   \n",
       "1014  nyc.redstockings.1973.price.keepingwomenout-11...      nyc   \n",
       "1015                             notessecondyear_18.txt      nyc   \n",
       "1016  nyc.redstockings.1973.sarachild.powerofhistory...      nyc   \n",
       "1017                          nyc.masses_1915.11.09.txt      nyc   \n",
       "1018  chicago.hullhouse_bulletin.1910.05.36_article.txt  chicago   \n",
       "1019  chicago.hullhouse_bulletin.1905.08.22_article.txt  chicago   \n",
       "1020              chicago.cwlu_womankind.1972.04.01.txt  chicago   \n",
       "1021              chicago.cwlu_womankind.1971.12.14.txt  chicago   \n",
       "1022  nyc.redstockings.1973.leon.conditioningline-3.txt      nyc   \n",
       "\n",
       "             publication  date  word_count           org  identifier  wave  \\\n",
       "0        notessecondyear  1969         553  redstockings           1     2   \n",
       "1         cwlu_womankind  1971         890          cwlu           2     2   \n",
       "2                 masses  1916         425    heterodoxy           3     1   \n",
       "3           redstockings  1973         972  redstockings           4     2   \n",
       "4         cwlu_womankind  1972          39          cwlu           5     2   \n",
       "5         notesfirstyear  1968         442  redstockings           6     2   \n",
       "6         cwlu_womankind  1972         976          cwlu           7     2   \n",
       "7           redstockings  1973         785  redstockings           8     2   \n",
       "8         cwlu_womankind  1972         985          cwlu           9     2   \n",
       "9         cwlu_womankind  1972         369          cwlu          10     2   \n",
       "10        cwlu_womankind  1972         938          cwlu          11     2   \n",
       "11        cwlu_womankind  1972        1591          cwlu          12     2   \n",
       "12        cwlu_womankind  1973         689          cwlu          13     2   \n",
       "13        cwlu_womankind  1972        1130          cwlu          14     2   \n",
       "14        cwlu_womankind  1973         899          cwlu          15     2   \n",
       "15        cwlu_womankind  1973        1106          cwlu          16     2   \n",
       "16        cwlu_womankind  1972        1573          cwlu          17     2   \n",
       "17        cwlu_womankind  1972        1439          cwlu          18     2   \n",
       "18          redstockings  1973        1094  redstockings          19     2   \n",
       "19          redstockings  1973        1011  redstockings          20     2   \n",
       "20       notessecondyear  1969         974  redstockings          21     2   \n",
       "21          redstockings  1973         959  redstockings          22     2   \n",
       "22        cwlu_womankind  1971         137          cwlu          23     2   \n",
       "23    hullhouse_bulletin  1916         717     hullhouse          24     1   \n",
       "24        cwlu_womankind  1973         878          cwlu          25     2   \n",
       "25        cwlu_womankind  1973         854          cwlu          26     2   \n",
       "26          redstockings  1973        1154  redstockings          27     2   \n",
       "27          redstockings  1973         318  redstockings          28     2   \n",
       "28    hullhouse_bulletin  1916         699     hullhouse          29     1   \n",
       "29    hullhouse_bulletin  1916         362     hullhouse          30     1   \n",
       "...                  ...   ...         ...           ...         ...   ...   \n",
       "993      notessecondyear  1969        1023  redstockings         994     2   \n",
       "994   hullhouse_bulletin  1906         730     hullhouse         995     1   \n",
       "995   hullhouse_bulletin  1901         451     hullhouse         996     1   \n",
       "996               masses  1914         212    heterodoxy         997     1   \n",
       "997   hullhouse_bulletin  1913         352     hullhouse         998     1   \n",
       "998       cwlu_womankind  1972         819          cwlu         999     2   \n",
       "999   hullhouse_bulletin  1900         908     hullhouse        1000     1   \n",
       "1000        redstockings  1973         965  redstockings        1001     2   \n",
       "1001        redstockings  1973         261  redstockings        1002     2   \n",
       "1002      cwlu_womankind  1972        1748          cwlu        1003     2   \n",
       "1003     notessecondyear  1969         895  redstockings        1004     2   \n",
       "1004              masses  1915        1372    heterodoxy        1005     1   \n",
       "1005        redstockings  1973         417  redstockings        1006     2   \n",
       "1006      cwlu_womankind  1972         691          cwlu        1007     2   \n",
       "1007        redstockings  1973        1208  redstockings        1008     2   \n",
       "1008      cwlu_womankind  1973        1912          cwlu        1009     2   \n",
       "1009        redstockings  1973        1218  redstockings        1010     2   \n",
       "1010      cwlu_womankind  1972         599          cwlu        1011     2   \n",
       "1011  hullhouse_bulletin  1903         750     hullhouse        1012     1   \n",
       "1012      cwlu_womankind  1973        1158          cwlu        1013     2   \n",
       "1013      cwlu_womankind  1972          46          cwlu        1014     2   \n",
       "1014        redstockings  1973         815  redstockings        1015     2   \n",
       "1015     notessecondyear  1969        1059  redstockings        1016     2   \n",
       "1016        redstockings  1973        1142  redstockings        1017     2   \n",
       "1017              masses  1915        1643    heterodoxy        1018     1   \n",
       "1018  hullhouse_bulletin  1910         270     hullhouse        1019     1   \n",
       "1019  hullhouse_bulletin  1905         845     hullhouse        1020     1   \n",
       "1020      cwlu_womankind  1972          20          cwlu        1021     2   \n",
       "1021      cwlu_womankind  1971        1195          cwlu        1022     2   \n",
       "1022        redstockings  1973        1119  redstockings        1023     2   \n",
       "\n",
       "                                            text_string  \n",
       "0     1 1 1 1 1 10 11 2 2 2 2 3 3 3 4 5 6 7 8 9 A An...  \n",
       "1     411 93 Actually Alice American American Any As...  \n",
       "2     All Anarchist Anarchist And Birth Birth Birth ...  \n",
       "3     1968 1968 50s 60s Although Although American A...  \n",
       "4     1972 5 Ghots I January Womankind a bind by cro...  \n",
       "5     12 12 15 1868 1868 1968 28 A AUNT All Anybody ...  \n",
       "6     1 1970 2 2 3 4 4 5 6 7 8 A AT Also Also Amer A...  \n",
       "7     1 2 3 A A A APPENDIX And CONSCIOUSNESSRAISING ...  \n",
       "8     1867 1972 A AND ARTICLES Adopt Affiar All Amaz...  \n",
       "9     Above CWLU CWLU CWLU Chicago Chicago Discus Li...  \n",
       "10    10 12volt 15 6volt 6volt 6volt A A Another As ...  \n",
       "11    1930s 1930s 1auqhter 30s 5th 6th 7871786 A Act...  \n",
       "12    25 29 2ND 4 656 A Any Any April As Barry But C...  \n",
       "13    10 1020 1161 12 1300s 1500 1793 1930s 1950s 19...  \n",
       "14    14 30 ABORTION AFLCIO AFLCIO AND Abortions Act...  \n",
       "15    112940 1970 1971 1972 1972 1974 26000 3 341840...  \n",
       "16    1 1100015000 13 13 1318 18 1800036000 1972 1eV...  \n",
       "17    1972 2 2 3 4 438211 552 575 79 852 A A A A A A...  \n",
       "18    1000 150 375 6 7 74 And Apparently As As Avenu...  \n",
       "19    2 3 3 4 4 Although Being But Do Do FROM Gradua...  \n",
       "20    1 1 1570 2 3 369 4 5 5 50 6 6 7 7 8 92169 95 A...  \n",
       "21    1971 26 All And And And And August Being But C...  \n",
       "22    1970 A A AGGRESS ALL AND ANOTHER AS BE BEFORE ...  \n",
       "23    1914 1915 Abbott Addams Addams Addams Aletta A...  \n",
       "24    100000 10th 13 160 1950s 1972 1972 20hour 21 2...  \n",
       "25    1 18611939 1880s 1902 1902 1904 1906 1915 1915...  \n",
       "26    171 1970 A Alpert Alpert Alpert Alpert Alperts...  \n",
       "27    1 197 1971 20 3 3 5 65 And As Be Brule But Fem...  \n",
       "28    12060 130 1915 30 A Advancement Art Associatio...  \n",
       "29    A A Al Augusta Beauty Bowen Bowen Childrens Ch...  \n",
       "...                                                 ...  \n",
       "993   A A American And And And Applicant But But But...  \n",
       "994   11 111 11th 1907 1907 19th 1st 5 5 7 7th 830 9...  \n",
       "995   A AND ARTS Afternoons And Arts Bohemian Buildi...  \n",
       "996   600000 A A ARE Ahout All And B Being COMPARATI...  \n",
       "997   A An An An As At Balaleika Balaleika Balls Bow...  \n",
       "998   13 6 A AFTER All And And And And Any BabysitAn...  \n",
       "999   26th 7th 7th 7th AND Addams Association Associ...  \n",
       "1000  According Although And Anne Are As Betty Blami...  \n",
       "1001  1 1000 1000 1010 1010 1020 1050 1900 1940 1960...  \n",
       "1002  1 10 10 14 15 15 16 1970 2 2 29 3 34 37270 4 4...  \n",
       "1003  10 A Adult As At Because Gradually He Hindu Hi...  \n",
       "1004  And And And And Because But But Do For I I I I...  \n",
       "1005  1968 1970 1970 AND Above America Among Atlanti...  \n",
       "1006  3 37 9 A Act And Association August British Bu...  \n",
       "1007  15year 1953 1957 195960 195962 1965 1965 1967 ...  \n",
       "1008  170 19 197172 1972 1972 1972 1972 1973 20 200 ...  \n",
       "1009  100 1959 Anthony Anthony Anthony Anthony Antho...  \n",
       "1010  100 12 1250 180 1910 1910 1969 1970 1970 2 2 2...  \n",
       "1011  11 12 1903 5 7 8 9 A AJAX APARTMENTS Achilles ...  \n",
       "1012  1000year 12 2 212 23 5000 60s Action Again Age...  \n",
       "1013  1 10 11 11 14 1972 25 8 Chicago JULY Liberatio...  \n",
       "1014  100000 10786 131 1380000year 1500000year 15000...  \n",
       "1015  2 And And But But But But For He His I I In In...  \n",
       "1016  2 A A A A All And And And As As As As Because ...  \n",
       "1017  A A A A A A A Against Ah Alice All Allies And ...  \n",
       "1018  A Another Association Association Christmas Ch...  \n",
       "1019  100 106 1101tionse 1896 1897 1c6 1st 43 A Aid ...  \n",
       "1020  1 1972 8 April Home Womankind and girls is no ...  \n",
       "1021  95 Adn And And And Answering At At Because Bei...  \n",
       "1022  1 1970 1971 2 3 4 5 6 A A A A According Aug Br...  \n",
       "\n",
       "[1023 rows x 9 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "\n",
    "#read in data\n",
    "df = pandas.read_csv(\"../data/comparativewomensmovement_dataset.csv\", sep='\\t', index_col=0, encoding='utf-8')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#concatenate the documents from each organization together, creaing four strings\n",
    "\n",
    "redstockings = df[df['org']=='redstockings']\n",
    "redstockings_string = ' '.join(str(s) for s in redstockings['text_string'].tolist())\n",
    "cwlu = df[df['org']=='cwlu']\n",
    "cwlu_string = ' '.join(str(s) for s in cwlu['text_string'].tolist())\n",
    "heterodoxy = df[df['org']=='heterodoxy']\n",
    "heterodoxy_string = ' '.join(str(s) for s in heterodoxy['text_string'].tolist())\n",
    "hullhouse = df[df['org']=='hullhouse']\n",
    "hullhouse_string = ' '.join(str(s) for s in hullhouse['text_string'].tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#initialize countvectorizer function, removing stop words\n",
    "countvec = CountVectorizer(stop_words=\"english\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### The next two cells produce the output used in Table 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "movement                0.007288\n",
       "women                   0.005573\n",
       "men                     0.004454\n",
       "radical                 0.003999\n",
       "feminist                0.003678\n",
       "male                    0.003574\n",
       "political               0.003177\n",
       "history                 0.002825\n",
       "womens                  0.002474\n",
       "feminism                0.002097\n",
       "revolution              0.001655\n",
       "love                    0.001603\n",
       "feminists               0.001549\n",
       "left                    0.001534\n",
       "power                   0.001517\n",
       "oppression              0.001495\n",
       "class                   0.001475\n",
       "female                  0.001473\n",
       "personal                0.001396\n",
       "woman                   0.001383\n",
       "really                  0.001345\n",
       "consciousness           0.001318\n",
       "consciousnessraising    0.001316\n",
       "group                   0.001306\n",
       "theory                  0.001295\n",
       "groups                  0.001292\n",
       "action                  0.001280\n",
       "new                     0.001274\n",
       "oppressed               0.001151\n",
       "supremacy               0.001130\n",
       "                          ...   \n",
       "pay                    -0.001024\n",
       "help                   -0.001045\n",
       "abortion               -0.001080\n",
       "rape                   -0.001101\n",
       "hospital               -0.001111\n",
       "home                   -0.001117\n",
       "medical                -0.001126\n",
       "south                  -0.001158\n",
       "workers                -0.001170\n",
       "government             -0.001182\n",
       "womankind              -0.001307\n",
       "care                   -0.001326\n",
       "war                    -0.001329\n",
       "legal                  -0.001362\n",
       "vietnamese             -0.001368\n",
       "working                -0.001412\n",
       "city                   -0.001422\n",
       "health                 -0.001477\n",
       "day                    -0.001573\n",
       "office                 -0.001629\n",
       "people                 -0.001649\n",
       "nixon                  -0.001711\n",
       "vietnam                -0.001735\n",
       "cwlu                   -0.001771\n",
       "work                   -0.001835\n",
       "school                 -0.002130\n",
       "union                  -0.002356\n",
       "center                 -0.002617\n",
       "children               -0.003032\n",
       "chicago                -0.003986\n",
       "Name: 2, dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "redstockings_cwlu = pandas.DataFrame(countvec.fit_transform([redstockings_string, cwlu_string]).toarray(), columns=countvec.get_feature_names())\n",
    "redstockings_cwlu['word_count'] = redstockings_cwlu.sum(axis=1)\n",
    "redstockings_cwlu = redstockings_cwlu.iloc[:,0:].div(redstockings_cwlu.word_count, axis=0)\n",
    "redstockings_cwlu.loc[2] = redstockings_cwlu.loc[0] - redstockings_cwlu.loc[1]\n",
    "#The words with the highest difference of proportions are distinct to Redstocking\n",
    "#The words with the lowest (the highest negative) difference of proportions are distinct to CWLU\n",
    "redstockings_cwlu.loc[2].sort_values(axis=0, ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "woman           0.007661\n",
       "man             0.005207\n",
       "women           0.005077\n",
       "life            0.003658\n",
       "know            0.003483\n",
       "world           0.003385\n",
       "like            0.003252\n",
       "sanger          0.003242\n",
       "men             0.003156\n",
       "said            0.003046\n",
       "home            0.002958\n",
       "just            0.002857\n",
       "say             0.002752\n",
       "dont            0.002631\n",
       "little          0.002590\n",
       "way             0.002561\n",
       "think           0.002457\n",
       "things          0.002279\n",
       "want            0.002242\n",
       "sex             0.002238\n",
       "right           0.002030\n",
       "masses          0.001970\n",
       "make            0.001966\n",
       "thing           0.001883\n",
       "good            0.001805\n",
       "business        0.001735\n",
       "law             0.001697\n",
       "case            0.001667\n",
       "control         0.001649\n",
       "birth           0.001642\n",
       "                  ...   \n",
       "association    -0.002184\n",
       "city           -0.002257\n",
       "summer         -0.002284\n",
       "plays          -0.002285\n",
       "various        -0.002373\n",
       "building       -0.002498\n",
       "italian        -0.002509\n",
       "neighborhood   -0.002676\n",
       "evening        -0.002689\n",
       "children       -0.002839\n",
       "room           -0.003008\n",
       "residents      -0.003040\n",
       "mrs            -0.003055\n",
       "clubs          -0.003390\n",
       "held           -0.003445\n",
       "social         -0.003609\n",
       "years          -0.003620\n",
       "work           -0.003649\n",
       "boys           -0.003919\n",
       "house          -0.003928\n",
       "classes        -0.004414\n",
       "mr             -0.004908\n",
       "chicago        -0.005230\n",
       "members        -0.005246\n",
       "year           -0.005418\n",
       "given          -0.005685\n",
       "school         -0.006431\n",
       "miss           -0.009809\n",
       "club           -0.014529\n",
       "hullhouse      -0.017059\n",
       "Name: 2, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Heterodoxy versus Hull House\n",
    "heterodoxy_hullhouse = pandas.DataFrame(countvec.fit_transform([heterodoxy_string, hullhouse_string]).toarray(), columns=countvec.get_feature_names())\n",
    "heterodoxy_hullhouse['word_count'] = heterodoxy_hullhouse.sum(axis=1)\n",
    "heterodoxy_hullhouse = heterodoxy_hullhouse.iloc[:,0:].div(heterodoxy_hullhouse.word_count, axis=0)\n",
    "heterodoxy_hullhouse.loc[2] = heterodoxy_hullhouse.loc[0] - heterodoxy_hullhouse.loc[1]\n",
    "\n",
    "#The words with the highest difference of proportions are distinct to Heterodoxy\n",
    "#The words with the lowest (the highest negative) difference of proportions are distinct to Hull House\n",
    "heterodoxy_hullhouse.loc[2].sort_values(axis=0, ascending=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
